package com.sunzm.spark.sql

import org.apache.spark.sql.{DataFrame, SparkSession}

/**
 *
 * 读取Parquet文件, 以JSON的形式打印
 *
 * @author Administrator
 * @version 1.0
 * @date 2021-08-12 15:49
 */
object ParquetToJSON {
  def main(args: Array[String]): Unit = {
    val spark: SparkSession = SparkSession
      .builder()
      .appName(s"${this.getClass.getSimpleName.stripSuffix("$")}")
      .master("local[*]")
      .config("spark.default.parallelism", 8)
      .config("spark.sql.shuffle.partitions", 8)
      .getOrCreate()

    val dataDF: DataFrame = spark.read.parquet("C:/mydata/part-00000-d1fb682d-0da9-4c11-acf2-b1f9b588a4bd.snappy.parquet")

    dataDF.toJSON.show(10, false)

    spark.close()
  }
}
